library(tidyverse)
library(patchwork)
library(ggrastr)

load("data/plotting_data.RData")

viz_urls <- function(u,
                     source,
                     not_sig_alpha = .1, 
                     stat_sig_alpha = .2,
                     sub_sig_alpha = 1,
                     add_ci = TRUE,
                     add_tost_ci = TRUE,
                     point_size = 0.1,
                     m = 0.1,
                     subtitle_annotation = TRUE,
                    save_path = NULL,
                    file_name = NULL){
    
        subset_df <- filter(u, domain == source)
        domain_df <- head(subset_df, 1) # domain-only values only need one observation
    
        min_shares <- min(subset_df$n_shares, na.rm = TRUE)
        # min_shares must be at least 11 (minimum value across all platforms)
        min_shares <- max(min_shares, 11)
        max_shares <- domain_df$domain_shares
        max_shares <- min(max_shares, 100000000)
        
        ci_df <- tibble(n_shares = seq(min_shares, max_shares))
        domain_score <- domain_df$domain_score
        domain_sd <- domain_df$domain_sd
        
        # TODO: this is wonky - should find a more elegant way to 
        # trim ci_df
        ci_df <- filter(ci_df, n_shares %in% unique(u$n_shares))
        
        ci_df <- ci_df |>
        mutate(ci.upper = domain_score + (2.57 * domain_sd/sqrt(n_shares)),
              ci.lower = domain_score - (2.57 * domain_sd/sqrt(n_shares)))
        ci_df$ci.upper[ci_df$ci.upper > 1] <- NA
        ci_df$ci.lower[ci_df$ci.lower < -1] <- NA
    
        ci_df <- ci_df |>
            mutate(ci.upper.tost = ci.upper + m, ci.lower.tost = ci.lower - m)
        ci_df$ci.upper.tost[ci_df$ci.upper.tost > 1] <- NA
        ci_df$ci.lower.tost[ci_df$ci.lower.tost < -1] <- NA
    
    alphas <- c("Not Statistically Significant" = not_sig_alpha, 
                "Statistically But Not Substantively Significant" = stat_sig_alpha, 
                "Substantively Significantly Left" = sub_sig_alpha, 
                "Substantively Significantly Right" = sub_sig_alpha
                )
    colors <- c("Not Statistically Significant" = "black", 
                "Statistically But Not Substantively Significant" = "wheat3", 
                "Substantively Significantly Left" = "dodgerblue", 
                "Substantively Significantly Right" = "firebrick"
                )

    p <- ggplot()+
        rasterize(geom_point(data = subset_df,
                    aes(x = url_score,
                       y = n_shares,
                        color = sig_level,
                        alpha = sig_level),
                  size = point_size), dpi=300)+
        scale_y_log10(labels = scales::label_log()) +
        geom_vline(data = domain_df, 
                   aes(xintercept = domain_score), 
                   lty = "dashed")+
        scale_alpha_manual(name = "Distinct from Domain Score?\n(99% Level)",
                           values = alphas) +
        guides(alpha = "none", 
               color = "none")+
        labs(y = "Shares",
             x = "Audience Score",
            title = source)+
            theme_classic()+
            theme(text = element_text(family = "serif"),
                 plot.title = element_text(size = 24, face = "bold")) +
        coord_cartesian(xlim = c(-1, 1)) + 
        scale_color_manual(values = colors)
    if (add_ci == TRUE) {
        p <- p + 
            geom_line(data = ci_df, aes(y=n_shares, x=ci.upper), size=1, color="black", alpha=0.6, linetype = 'longdash') + 
            geom_line(data = ci_df, aes(y=n_shares, x=ci.lower), size=1, color="black", alpha=0.6, linetype = 'longdash')
    }
    if (add_ci == TRUE && add_tost_ci == TRUE) {
         p <- p + 
            geom_line(data = ci_df, aes(y=n_shares, x=ci.upper.tost), size=1, color="black", alpha=0.6, linetype = 'dashed') + 
            geom_line(data = ci_df, aes(y=n_shares, x=ci.lower.tost), size=1, color="black", alpha=0.6, linetype = 'dashed')
    }                                
                                         
    if(subtitle_annotation == TRUE) {
        prop <- mean(str_detect(subset_df$sig_level, "Left|Right")) |> scales::percent()
        p <- p + labs(subtitle = glue::glue("{prop} substantively distinct"))
        }
    
    if(is.null(file_name)){
        return(p)
    } else{
        ggsave(paste0(save_path, file_name, "_bubbles.png"), width = 8, height = 4)
        ggsave(paste0(save_path, file_name, "_bubbles.pdf"), width = 8, height = 4)
    }
}

wsj_bubbles <- viz_urls(urls_df, source = "wsj.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
nyt_bubbles <- viz_urls(urls_df, source = "nytimes.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
rt_bubbles <- viz_urls(urls_df, source = "rt.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
reason_bubbles <- viz_urls(urls_df, source = "reason.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
mediaite_bubbles <- viz_urls(urls_df, source = "mediaite.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
fox_bubbles <- viz_urls(urls_df, source = "foxnews.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
wapo_bubbles <- viz_urls(urls_df, source = "washingtonpost.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
breitbart_bubbles <- viz_urls(urls_df, source = "breitbart.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)

selected_domains <- (nyt_bubbles + wsj_bubbles + mediaite_bubbles) / (fox_bubbles + rt_bubbles + reason_bubbles)

select_plot <- 
    selected_domains+
    theme_classic()& 
    theme(text = element_text(family = "serif"),
        plot.title = element_text(size = 24, face = "bold"),
        plot.subtitle = element_text(size = 20, face = "plain"),
        axis.text = element_text(size=18),
        axis.title = element_text(size=18))
ggsave(select_plot, file = "results/fig_4a.pdf",width = 12*1.5, height = 8*1.5)
ggsave(select_plot, file = "results/tw_selected_domains.pdf",width = 12*1.5, height = 8*1.5)


recode_sim_inference <- function(score, orig, lower_ci) {
    return(case_when(
         orig == "null" ~ "Not Statistically Significant",
         orig == "stat" ~ "Statistically But Not Substantively Significant",
         orig == "sub" & score <= lower_ci ~ "Substantively Significantly Left",
         orig == "sub" & score > lower_ci ~ "Substantively Significantly Right",
         TRUE ~ NA_character_,
     ))
    }


urls_df <- urls_df |>
 mutate(inference = pmap_chr(
     list(url_score_continuous, inference, sub_lwr99), recode_sim_inference)) |>
 mutate(inference = as.factor(inference))

wsj_bubbles <- viz_urls(urls_df |> select(-sig_level, sig_level = inference), source = "wsj.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
nyt_bubbles <- viz_urls(urls_df |> select(-sig_level, sig_level = inference), source = "nytimes.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
rt_bubbles <- viz_urls(urls_df |> select(-sig_level, sig_level = inference), source = "rt.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
reason_bubbles <- viz_urls(urls_df |> select(-sig_level, sig_level = inference), source = "reason.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
mediaite_bubbles <- viz_urls(urls_df |> select(-sig_level, sig_level = inference), source = "mediaite.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
fox_bubbles <- viz_urls(urls_df |> select(-sig_level, sig_level = inference), source = "foxnews.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
wapo_bubbles <- viz_urls(urls_df |> select(-sig_level, sig_level = inference), source = "washingtonpost.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
breitbart_bubbles <- viz_urls(urls_df |> select(-sig_level, sig_level = inference), source = "breitbart.com", point_size = 0.9, not_sig_alpha = .025, stat_sig_alpha = .15, sub_sig_alpha = .75)
selected_domains <- (nyt_bubbles + wsj_bubbles + mediaite_bubbles) / (fox_bubbles + rt_bubbles + reason_bubbles)

select_plot <- 
selected_domains+
plot_annotation(#title = "Twitter: Distributions of Stories Within Selected Domains",
               #subtitle = "Simulation approach",
                #caption = "Parametric confidence intervals shown for reference."
               )& 
theme_classic()& 
theme(text = element_text(family = "serif"),
      plot.title = element_text(size = 24, face = "bold"),
      plot.subtitle = element_text(size = 20, face = "plain"),
      axis.text = element_text(size=18),
      axis.title = element_text(size=18))
ggsave(select_plot, file = "results/fig_f1.pdf",width = 12*1.5, height = 8*1.5)
ggsave(select_plot, file = "results/tw_selected_domains_sim.pdf",width = 12*1.5, height = 8*1.5)
